author: “Journ Galvan” date: “4/23/2020” output: html_document —
knitr::opts_chunk$set(echo = TRUE)
# Load libraries
library(here)
library(tidyverse)
library(vegan)
library(reshape)
library(ggplot2)
library(ggpubr)
library(zoo)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(psych)
library(knitr)
library(faraway)
library(car)
library(MASS)
library(gridExtra)
library(grid)
#Load data
branch_width <- read.csv(here("photogrammetry_data/branchwidth_data.csv"))
coral_pg <- read.csv(here("photogrammetry_data/galvan_journ_datasheet_v2.csv"))
coral_field <- read.csv(here("photogrammetry_data/field_experiment_colony_measurements_moorea_summer2019.csv"))
cafi_surveys <- read.csv(here("cafi_data/revised_cafi_data_moorea_summer2019_11_27.csv"))
cafi_field <- read.csv(here("cafi_data/prelim_cafi_counts_moorea_summer2019.csv"))
updated_cafi <- read.csv(here("cafi_data/cafi_data_w_taxonomy_summer2019_2020_5_21.csv"))
Data was filtered to only include invertebrates identified in coral colonies. These were identified down to the species level. Species richness, abundance and Shannon weiner diversity index was calculated.
#Incorporate cleaned CAFI data
updated_cafi2 <- updated_cafi %>% filter(str_detect(coral_id, "^FE")) %>%
dplyr::select(master_sort, coral_id, code, type, search_term, lowest_level, phylum, genus, species, general_notes) %>%
subset(phylum!="Chordata")
#Calculate CAFI richness, abundance, and diversity (shannon weiner) for each coral
cafi_summarized2 <- group_by(updated_cafi2, coral_id) %>%
summarise(num_cafi = n(), cafi_richness = length(unique(code)), cafi_present = paste(sort(unique(code)), collapse = ";"))
cafi_summarized2$sw <- updated_cafi2 %>%
count(code, coral_id = coral_id) %>%
spread(code,n) %>%
mutate_all(list(~tidyr::replace_na(.,0))) %>%
dplyr::select(-coral_id) %>%
diversity(index = "shannon")
Cleaned data of coral colonies taken from the field and using photogrammetry techniques were combined into one dataframe. Many more and accurate morphological measurements could be acquired using photogrammetry than with traditional measurements taken *in situ. Common measurements of volume, surface area and available space known as interstitial space were calculated of each coral colonly.
Available space was traditionally measured using 2D measurements of five randomly selected branch distances and averaging them. In this experiment, we took methods used by Neil E. Doszpot et al. to calculate a 3D measurement of available space using convex hull geometry and the software estimated coral volume.
Because we did not take random branch distance measurements in the field, we used the software models to randomly select branch distances to be averaged.
Three other measurements were calculated from the coral models: convexity and sphericity which capture volume compactness and packing which captures how much of an objects surface area is situated internally versus externally.
Wide or tight branching corals were initially qualitatively determined. However, classifying corals with convexity values \(>= 0.5\) as “tight” and \(< 0.5\) as “wide” offers another method of classifying coral morphology. For the rest of these tests, we will classify “wide” and “tight” branching corals using this method.
# Clean field data
coral_field2 <- coral_field %>%
dplyr::rename(branch = branch_width)
# Clean photogrammetry morphometric data
coral_pg$volume_pg <- as.numeric(as.character(coral_pg$volume_pg)) # Convert factor to numeric
# Clean photogrammetry branch width data and take averages of branch distance for each coral
branch_width$branch_distance_mm <- as.numeric(as.character(branch_width$branch_distance_mm)) # Convert factor to numeric
branch_w_summarized <- group_by(branch_width, coral_id) %>%
summarise(avg_w_cm = sum(branch_distance_mm/10)/n(), #take average branch distance and convert mm to cm
measurements = length(unique(replicate_measurement)),
locations = paste(sort(unique(location)), collapse = ";"))
#Create dataframe called coral_dim
coral_dim <- merge(coral_pg, branch_w_summarized, by = "coral_id") %>%
merge(coral_field2, by = "coral_id") %>%
mutate(volume_pg=volume_pg*10^6,
max_hull_volume=max_hull_volume*10^6, #convert m^3 to cm^3
max_hull_surface_area=max_hull_surface_area*10^4,
surface_area=surface_area*10^4) %>% #convert m^2 to cm^2
dplyr::rename(max_hull_SA = max_hull_surface_area,
SA = surface_area)
coral_dim$interstitial_space <- coral_dim$max_hull_volume - coral_dim$volume_pg #calculate available space by subtracting software estimated volume from convex hull volume
coral_dim$SAV <- coral_dim$SA / coral_dim$volume_pg #calculate surface area to volume relationship
coral_dim$convexity <- coral_dim$volume_pg / coral_dim$max_hull_volume #calculate proportion occupied, high ratios indicate less free space between branches
coral_dim$packing <- coral_dim$SA / coral_dim$max_hull_SA #how much of an objects surface area is situated internally
coral_dim$sphericity <- ((3.14^(1/3))*((6*coral_dim$volume_pg)^(2/3)))/coral_dim$SA #calculate sphericity or how close the object is to a sphere
coral_dim <- coral_dim %>%
dplyr::select(coral_id, branch, cafi, size_class, volume_field, volume_pg, SA, avg_w_cm, interstitial_space, SAV, convexity, packing, sphericity) %>%
filter(cafi=="empty")
coral_dim$branch <- ifelse(coral_dim$convexity>=0.5, "tight","wide") #classifies wide and tight branching coral based on convexity measurement
Dataframes were combined and seperate columns created for log and square root transformations.
#Create dataframe called cafi_coral and merge coral and cafi data
cafi_coral <- merge(cafi_summarized2, coral_dim, by ="coral_id") %>%
drop_na(interstitial_space) %>%
dplyr::select(coral_id, branch, cafi, size_class, volume_field, volume_pg, SA, avg_w_cm, interstitial_space, SAV, convexity, packing, sphericity, num_cafi, cafi_present, cafi_richness, sw)
#Log transform variables
cafi_coral$log_num_cafi <- log(cafi_coral$num_cafi)
cafi_coral$log_volume_field <- log(cafi_coral$volume_field)
cafi_coral$log_volume_pg <- log(cafi_coral$volume_pg)
cafi_coral$log_avg_w <- log(cafi_coral$avg_w_cm)
cafi_coral$log_interstitial_space <- log(cafi_coral$interstitial_space)
cafi_coral$log_SA <- log(cafi_coral$SA)
cafi_coral$log_cafi_richness <- log(cafi_coral$cafi_richness)
cafi_coral$log_sphericity <- log(cafi_coral$sphericity)
#Sqrt transform variables
cafi_coral$sqrt_num_cafi <- sqrt(cafi_coral$num_cafi)
cafi_coral$sqrt_volume_pg <- sqrt(cafi_coral$volume_pg)
cafi_coral$sqrt_interstitial_space <- sqrt(cafi_coral$interstitial_space)
cafi_coral$sqrt_SA <- sqrt(cafi_coral$SA)
cafi_coral$sqrt_cafi_richness <- sqrt(cafi_coral$cafi_richness)
cafi_coral$sqrt_sphericity <- sqrt(cafi_coral$sphericity)
Non-transformed and transformed morphological measurements correlated using *pairs.panels().
#Visualize correlations between variables
cor_var <- cafi_coral %>%
dplyr::select(num_cafi,volume_pg,volume_field,SA,avg_w_cm,interstitial_space,sphericity,convexity,packing)
pairs.panels(cor_var,
method="pearson",
hist.col = "white",
density=TRUE,
ellipses=FALSE,
scale=TRUE)
#Use log transformed data
cor_var2 <- cafi_coral %>%
dplyr::select(log_num_cafi,log_volume_pg,log_volume_field,log_SA,log_avg_w,log_interstitial_space,log_sphericity,convexity,packing)
pairs.panels(cor_var2,
method="pearson",
hist.col = "white",
density=TRUE,
ellipses=FALSE,
scale=TRUE)
Log transformed field and software estimated volume and log transformed software estimated SA and interstitial space are all highly correlated to eachother. Packing is also highly correlated to volume and SA.
Here, we want to compare the differences between our software and manual measurements of volume, height, length and width. Manual measurements for coral volume were estimated from an elipsoid.
dim_pg <- coral_pg %>%
dplyr::select(coral_id, height_pg, length_pg, width_pg, volume_pg) %>%
mutate(height_pg = height_pg*100,
length_pg = length_pg*100,
width_pg = width_pg*100,
volume_pg = volume_pg*10^6)
dim_field <- coral_field %>%
dplyr::select(coral_id, height_field, length_field, width_field, volume_field)
dim_bind <- merge(dim_pg, dim_field, by="coral_id") %>%
drop_na()
Since the corals are not independent, that is, they were taken on the same coral but with different methods, we will consider a paired t-test with dependent samples. Height, length and width measurements are also subjective since they may have been taken from different areas on the coral. We will first check for normallity assumptions. Normality will be checked with a qqplot.
par(mfrow=c(3,3))
qqPlot(dim_bind$height_pg, dist="norm")
## [1] 1 58
qqPlot(dim_bind$height_field, dist="norm")
## [1] 1 56
qqPlot(dim_bind$length_pg, dist="norm")
## [1] 6 57
qqPlot(dim_bind$length_field, dist="norm")
## [1] 6 57
qqPlot(dim_bind$width_pg, dist="norm")
## [1] 19 1
qqPlot(dim_bind$width_field, dist="norm")
## [1] 1 23
par(mfrow=c(2,2))
par(mfrow=c(2,2))
qqPlot(dim_bind$volume_pg, dist="norm")
## [1] 58 23
qqPlot(dim_bind$volume_field, dist="norm")
## [1] 1 36
par(mfrow=c(1,1))
#Volume data is not normal, try sqrt transformation
dim_bind$sqrt_volume_pg <- sqrt(dim_bind$volume_pg)
dim_bind$sqrt_volume_field <- sqrt(dim_bind$volume_field)
par(mfrow=c(2,2))
qqPlot(dim_bind$sqrt_volume_pg, dist="norm")
## [1] 58 23
qqPlot(dim_bind$sqrt_volume_field, dist="norm")
## [1] 1 36
par(mfrow=c(1,1))
#sqrt transformed volume for both measurements is normal, move to paired t-test
t.test(dim_bind$sqrt_volume_pg,dim_bind$sqrt_volume_field, paired=TRUE)
##
## Paired t-test
##
## data: dim_bind$sqrt_volume_pg and dim_bind$sqrt_volume_field
## t = -8.7044, df = 58, p-value = 4.091e-12
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -30.32779 -18.98700
## sample estimates:
## mean of the differences
## -24.65739
#ddply(dim_bind, .(sqrt_volume_pg,sqrt_volume_field), summarize,
t.test(dim_bind$height_pg,dim_bind$height_field, paired=TRUE)
##
## Paired t-test
##
## data: dim_bind$height_pg and dim_bind$height_field
## t = -5.1446, df = 58, p-value = 3.32e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.835609 -1.247079
## sample estimates:
## mean of the differences
## -2.041344
t.test(dim_bind$length_pg,dim_bind$length_field, paired=TRUE)
##
## Paired t-test
##
## data: dim_bind$length_pg and dim_bind$length_field
## t = -0.93279, df = 58, p-value = 0.3548
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.0463083 0.3811287
## sample estimates:
## mean of the differences
## -0.3325898
t.test(dim_bind$width_pg,dim_bind$width_field, paired=TRUE)
##
## Paired t-test
##
## data: dim_bind$width_pg and dim_bind$width_field
## t = -3.7184, df = 58, p-value = 0.0004538
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.973207 -0.592183
## sample estimates:
## mean of the differences
## -1.282695
We reject the null hypothesis that the true difference in means is equal to zero for all measurements except for length.
g1 <- ggplot(cafi_coral, aes(x=log_volume_pg, y=log_SA, shape=branch, color=branch, linetype=branch))+
geom_point()+
geom_smooth(method=lm, se=TRUE, aes(fill=branch))+
theme_classic()+
theme(axis.title.x=element_blank(),
legend.position="none")+
labs(y=expression(paste("Log SA cm"^{2})))
g2 <- ggplot(cafi_coral, aes(x=log_volume_pg, y=log_avg_w, shape=branch, color=branch, linetype=branch))+
geom_point()+
geom_smooth(method=lm, se=TRUE, aes(fill=branch))+
theme_classic()+
theme(axis.title.x=element_blank(),
legend.position="none")+
labs(y="Log branch distance cm")
g3 <- ggplot(cafi_coral, aes(x=log_volume_pg, y=log_interstitial_space, shape=branch, color=branch, linetype=branch))+
geom_point()+
geom_smooth(method=lm, se=TRUE, aes(fill=branch))+
theme_classic()+
theme(axis.title.x=element_blank(),
legend.position="none")+
labs(y=expression(paste("Log space cm"^{3})))
g4 <- ggplot(cafi_coral, aes(x=log_volume_pg, y=log_sphericity, shape=branch, color=branch, linetype=branch))+
geom_point()+
geom_smooth(method=lm, se=TRUE, aes(fill=branch))+
theme_classic()+
theme(axis.title.x=element_blank(),
legend.position="none")+
labs(y="Log sphericity")
g5 <- ggplot(cafi_coral, aes(x=log_volume_pg, y=convexity, shape=branch, color=branch, linetype=branch))+
geom_point()+
geom_smooth(method=lm, se=TRUE, aes(fill=branch))+
theme_classic()+
theme(axis.title.x=element_blank(),
legend.position="none")+
labs(y="Convexity", x=expression(paste("Log Vol. cm"^{3})))
g6 <- ggplot(cafi_coral, aes(x=log_volume_pg, y=packing, shape=branch, color=branch, linetype=branch))+
geom_point()+
geom_smooth(method=lm, se=TRUE, aes(fill=branch))+
theme_classic()+
theme(axis.title.x=element_blank(),
legend.position="bottom")+
labs(y="Packing")
#extract legend
#https://github.com/hadley/ggplot2/wiki/Share-a-legend-between-two-ggplot2-graphs
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)}
mylegend<-g_legend(g6)
## `geom_smooth()` using formula 'y ~ x'
fig1 <- ggarrange(g1,g2,g3,g4,g5,g6,
ncol = 3, nrow=2)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
annotate_figure(fig1,
top = text_grob("Morphological Measurements", color = "black", face = "bold", size = 14),
bottom = text_grob(expression(paste("Log Vol. cm"^{3})), color = "black",
face = "bold", size = 12),
fig.lab = "Figure 1", fig.lab.face = "bold")
The below graphs represent how invertebrate abundance relates to both volume measurements. We will test how well our software estimated method of calculating volume and interstitial space relates to invertebrate counts. We will first assess \(p<0.05\) to test if the linear regression slope was significantly different from zero. Correlation of dependent and independent variables \(R^2\) was also included in the tables.
pg_field <- ggplot(cafi_coral, aes(x=volume_pg, y=volume_field))+
geom_point()+
geom_smooth(method=lm, se=TRUE)+
scale_y_continuous(breaks=seq(0,max(cafi_coral$volume_field), by = 5000))+
scale_x_continuous(breaks=seq(0,max(cafi_coral$volume_pg), by = 2000))+
theme_bw()+
labs(y= expression(paste("elipsoid V cm"^{3})), x= expression(paste("software V cm"^{3})))
pg_field
## `geom_smooth()` using formula 'y ~ x'
invert_field <- ggplot(cafi_coral, aes(x=volume_field, y=num_cafi))+
geom_point()+
geom_smooth(method=lm, se=TRUE)+
theme_bw()+
labs(y="invert abundance", x= expression(paste("elipsoid V cm"^{3})))
invert_field
## `geom_smooth()` using formula 'y ~ x'
lm_invert_field <- lm(num_cafi~volume_field, data=cafi_coral)
#HTML table from regression results
pl <- c(
'(Intercept)' = "Intercept",
volume_field = "elipsoid volume")
tab_model(lm_invert_field,
pred.labels = pl,
dv.labels = "Invertebrate abundance")
| Invertebrate abundance | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| Intercept | 13.69 | 7.36 – 20.03 | <0.001 |
| elipsoid volume | 0.00 | 0.00 – 0.00 | 0.043 |
| Observations | 29 | ||
| R2 / R2 adjusted | 0.143 / 0.111 | ||
log_invert_field <- ggplot(cafi_coral, aes(x=log_volume_field, y=log_num_cafi))+
geom_point()+
geom_smooth(method = lm, se=TRUE)+
theme_classic()+
#facet_wrap(~branch)+
labs(y="log invert. abundance", x= expression(paste("log elipsoid V cm"^{3})))
log_invert_field
## `geom_smooth()` using formula 'y ~ x'
lm_log_invert_field <- lm(log_num_cafi~log_volume_field, data=cafi_coral)
par(mfrow=c(2,2))
plot(lm_log_invert_field)
par(mfrow=c(1,1))
#HTML table from regression results
pl <- c(
'(Intercept)' = "Intercept",
log_volume_field = "Log elispoid V"
)
tab_model(lm_log_invert_field,
pred.labels = pl,
dv.labels = "Invertebrate abundance")
| Invertebrate abundance | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| Intercept | 0.31 | -0.92 – 1.54 | 0.606 |
| Log elispoid V | 0.28 | 0.14 – 0.43 | <0.001 |
| Observations | 29 | ||
| R2 / R2 adjusted | 0.372 / 0.348 | ||
invert_pg <- ggplot(cafi_coral, aes(x=volume_pg, y=num_cafi))+
geom_point()+
geom_smooth(method=lm, se=TRUE)+
theme_bw()+
labs(y="invert abundance", x= expression(paste("software V cm"^{3})))
invert_pg
## `geom_smooth()` using formula 'y ~ x'
lm_invert_pg <- lm(num_cafi~volume_pg, data=cafi_coral)
#HTML table from regression results
pl <- c(
'(Intercept)' = "Intercept",
volume_pg = "software est. volume")
tab_model(lm_invert_pg,
pred.labels = pl,
dv.labels = "Invertebrate abundance")
| Invertebrate abundance | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| Intercept | 11.55 | 5.54 – 17.55 | 0.001 |
| software est. volume | 0.00 | 0.00 – 0.00 | 0.004 |
| Observations | 29 | ||
| R2 / R2 adjusted | 0.275 / 0.248 | ||
#Create graphs of log transformed invertebrate abundance
log_invert_pg <- ggplot(cafi_coral, aes(x=log_volume_pg, y=log_num_cafi))+
geom_point()+
geom_smooth(method = lm, se=TRUE)+
theme_classic()+
#facet_wrap(~branch)+
labs(y="log invert. abundance", x= expression(paste("log software est. V cm"^{3})))
log_invert_pg
## `geom_smooth()` using formula 'y ~ x'
#Residuals for log transformed data
lm_log_invert_pg <- lm(log_num_cafi~log_volume_pg, data=cafi_coral)
par(mfrow=c(2,2))
plot(lm_log_invert_pg)
par(mfrow=c(1,1))
#HTML table from regression results
pl <- c(
'(Intercept)' = "Intercept",
log_volume_pg = "Log software est. V"
)
tab_model(lm_log_invert_pg,
pred.labels = pl,
dv.labels = "Invertebrate abundance")
| Invertebrate abundance | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| Intercept | 0.44 | -0.67 – 1.56 | 0.424 |
| Log software est. V | 0.30 | 0.15 – 0.44 | <0.001 |
| Observations | 29 | ||
| R2 / R2 adjusted | 0.391 / 0.369 | ||
When comparing elipsoid volume to software estimated volume we see significant p-values for both as represented in the above tables. Software estimated volume measurements and invert. abundance show a much lower p value indicating a greater slope from zero as well as having a higher correlation \(R^2\) value. When data was log transformed, there was little to no difference in p-values or correlation.
Software estimated available space calculated from convex hull and coral volume was related to invertebrate abundance. The same was repeated for the traditional method of average branch distance.
Dependent and independent variables were also compared after being log transformed.
branch_pg <- ggplot(cafi_coral, aes(x=interstitial_space, y=num_cafi))+
geom_point()+
geom_smooth(method=lm, se=TRUE)+
theme_bw()+
labs(y="invert abundance", x= expression(paste("IS cm"^{3})))
branch_pg
## `geom_smooth()` using formula 'y ~ x'
lm_branch_pg <- lm(num_cafi~interstitial_space, data=cafi_coral)
#HTML table from regression results
pl <- c(
'(Intercept)' = "Intercept",
interstitial_space = "IS")
tab_model(lm_branch_pg,
pred.labels = pl,
dv.labels = "Invertebrate abundance")
| Invertebrate abundance | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| Intercept | 16.01 | 9.85 – 22.17 | <0.001 |
| IS | 0.00 | -0.00 – 0.00 | 0.250 |
| Observations | 29 | ||
| R2 / R2 adjusted | 0.049 / 0.013 | ||
log_branch_pg <- ggplot(cafi_coral, aes(x=log_interstitial_space, y=log_num_cafi))+
geom_point()+
geom_smooth(method=lm, se=TRUE)+
theme_bw()+
labs(y="Log invert abundance", x= expression(paste("Log IS cm"^{3})))
log_branch_pg
## `geom_smooth()` using formula 'y ~ x'
lm_log_branch_pg <- lm(log_num_cafi~log_interstitial_space, data=cafi_coral)
#HTML table from regression results
pl <- c(
'(Intercept)' = "Intercept",
log_interstitial_space = "Log IS")
tab_model(lm_log_branch_pg,
pred.labels = pl,
dv.labels = "Invertebrate abundance")
| Invertebrate abundance | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| Intercept | 0.56 | -0.59 – 1.70 | 0.329 |
| Log IS | 0.28 | 0.13 – 0.43 | 0.001 |
| Observations | 29 | ||
| R2 / R2 adjusted | 0.354 / 0.330 | ||
branch_field <- ggplot(cafi_coral, aes(x=avg_w_cm, y=num_cafi))+
geom_point()+
geom_smooth(method=lm, se=TRUE)+
theme_bw()+
labs(y="invert abundance", x= "avg_w_cm")
branch_field
## `geom_smooth()` using formula 'y ~ x'
lm_branch_field <- lm(num_cafi~avg_w_cm, data=cafi_coral)
#HTML table from regression results
pl <- c(
'(Intercept)' = "Intercept",
avg_w_cm = "Avg. branch width")
tab_model(lm_branch_field,
pred.labels = pl,
dv.labels = "Invertebrate abundance")
| Invertebrate abundance | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| Intercept | 17.06 | 5.45 – 28.66 | 0.006 |
| Avg. branch width | 0.60 | -5.02 – 6.21 | 0.829 |
| Observations | 29 | ||
| R2 / R2 adjusted | 0.002 / -0.035 | ||
log_branch_field <- ggplot(cafi_coral, aes(x=log_avg_w, y=log_num_cafi))+
geom_point()+
geom_smooth(method=lm, se=TRUE)+
theme_bw()+
labs(y="Log invert abundance", x= "Log avg. width")
log_branch_field
## `geom_smooth()` using formula 'y ~ x'
lm_log_branch_field <- lm(log_num_cafi~log_avg_w, data=cafi_coral)
#HTML table from regression results
pl <- c(
'(Intercept)' = "Intercept",
log_avg_w = "Log avg. width")
tab_model(lm_log_branch_field,
pred.labels = pl,
dv.labels = "Invertebrate abundance")
| Invertebrate abundance | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| Intercept | 2.54 | 2.10 – 2.98 | <0.001 |
| Log avg. width | 0.23 | -0.42 – 0.87 | 0.476 |
| Observations | 29 | ||
| R2 / R2 adjusted | 0.019 / -0.017 | ||
Log transformed data appeared to not violate normality assumptions so was used in this analysis. Relating interstitial space to invertebrate abundance gave a signficant p-value while average width did not. The difference in p-values and \(R^2\) correlation values were significant. This may provide evidence to average branch width not adequately capturing available space for invertebrates when compared to a 3D software estimated measurement.
Visualize data using boxplots.
# Boxplot of invert abundance
log_invert_box <- ggplot(cafi_coral, aes(x= branch, y=log_num_cafi, color = branch))+
geom_boxplot(width = 0.5)+
geom_jitter()+ #adds data on top of box plot
scale_color_manual(values = c("firebrick1", "steelblue2"))+
scale_y_continuous(limits= c(0, max(cafi_coral$log_num_cafi + 1)))+
theme_classic()+
labs(y="Log invert. abundance", x="Coral branching", title = "Log invert. abundance boxplot")
log_invert_box
# Boxplot of invertebrate species richness per coral for both tight and wide branching
richness_box <- ggplot(cafi_coral, aes(x= branch, y=log_cafi_richness, color = branch))+
geom_boxplot(width = 0.5)+
geom_jitter()+
scale_color_manual(values = c("firebrick1", "steelblue2"))+
scale_y_continuous(limits= c(0, max(cafi_coral$log_cafi_richness + 1)))+
theme_classic()+
labs(y="Invert. sp. richness", x="Coral branching", title = "Invert. sp. richness boxplot")
richness_box
# Boxplot of invertebrate diversity for both tight and wide branching coral
diversity_box <- ggplot(cafi_coral, aes(x= branch, y=sw, color = branch))+
geom_boxplot(width = 0.5)+
geom_jitter()+
scale_color_manual(values = c("firebrick1", "steelblue2"))+
scale_y_continuous(limits= c(0, max(cafi_coral$sw + 0.5)), breaks = c(0, 0.5, 1, 1.5, 2, 2.5, 3))+
theme_classic()+
labs(y="Invert. diversity", x="Coral branching", title = "Invert. diversity boxplot")
diversity_box
Data was log transformed except for diversity so as not to violate normality assumptions. It appears that there is no difference in abundance, species richness or diversity.
Before conducting a two sample t-test we checked for normality using qq-plots and Shapiro Wilk test and a Levene’s test for equal variance.
#Q-Q plot separated by group
with(cafi_coral, qqPlot(num_cafi[branch == "wide"], dist="norm"))
## [1] 13 14
with(cafi_coral, qqPlot(num_cafi[branch == "tight"], dist="norm"))
## [1] 12 6
#Shapiro Wilk seperated by group
with(cafi_coral, shapiro.test(num_cafi[branch == "wide"]))
##
## Shapiro-Wilk normality test
##
## data: num_cafi[branch == "wide"]
## W = 0.92775, p-value = 0.1995
with(cafi_coral, shapiro.test(num_cafi[branch == "tight"]))
##
## Shapiro-Wilk normality test
##
## data: num_cafi[branch == "tight"]
## W = 0.76166, p-value = 0.003547
With a \(p>0.05\) we fail to reject the null hypothesis that our data is normally distributed for wide branching corals. With a \(p<0.05\) we reject the null hypothesis that our data is normally distributed for tight branching corals. Because we are working with smaller data sets, we cannot use the central limit theorem.
#Levene's test: response variable, group variable
leveneTest(cafi_coral$num_cafi, cafi_coral$branch)
## Warning in leveneTest.default(cafi_coral$num_cafi, cafi_coral$branch):
## cafi_coral$branch coerced to factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.2017 0.6569
## 27
With a \(p>0.05\) we fail to reject the null hypothesis that our data is normally distributed for tight and wide branching corals. Both groups of corals that have tight and wide branching morphologies have equal variance.
Because tight branching coral is not normally distributed, we could either perform a transformation or non-parametric test. We will try log transforming our data first. A column log transforming the number of inverts has already been created
#Q-Q plot separated by group
with(cafi_coral, qqPlot(log_num_cafi[branch == "wide"], dist="norm"))
## [1] 8 6
with(cafi_coral, qqPlot(log_num_cafi[branch == "tight"], dist="norm"))
## [1] 12 6
#Shapiro Wilk seperated by group
with(cafi_coral, shapiro.test(log_num_cafi[branch == "wide"]))
##
## Shapiro-Wilk normality test
##
## data: log_num_cafi[branch == "wide"]
## W = 0.93506, p-value = 0.2642
with(cafi_coral, shapiro.test(log_num_cafi[branch == "tight"]))
##
## Shapiro-Wilk normality test
##
## data: log_num_cafi[branch == "tight"]
## W = 0.88255, p-value = 0.09449
#Levene's test: response variable, group variable
leveneTest(cafi_coral$log_num_cafi, cafi_coral$branch)
## Warning in leveneTest.default(cafi_coral$log_num_cafi, cafi_coral$branch):
## cafi_coral$branch coerced to factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.4491 0.5084
## 27
# subset data
tight <- subset(cafi_coral, branch == "tight")
wide <- subset(cafi_coral, branch == "wide")
t.test(tight$log_num_cafi, wide$log_num_cafi, var.equal = TRUE)
##
## Two Sample t-test
##
## data: tight$log_num_cafi and wide$log_num_cafi
## t = 0.9573, df = 27, p-value = 0.3469
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3005325 0.8262331
## sample estimates:
## mean of x mean of y
## 2.81695 2.55410
With a \(p>0.05\) we fail to reject the null hypothesis that the difference between the means of the log transformed invert counts are different from zero.
Fit a single variable Yi=β0+β1Xi+ϵi or multivariable linear model Y=β0+β1X1+β2X2+…+βnXn+ϵ to our data using different combinations of explanatory variables. Our response variable will be abundance of invertebrates. We will use AIC and R-squared values to determine the best fit model.
#Visualize correlations between variables
# subset data
log_tight <- cafi_coral %>%
subset(branch == "tight") %>%
dplyr::select(log_num_cafi,log_volume_pg,log_SA,log_interstitial_space,log_sphericity,convexity,packing)
pairs.panels(log_tight,
method="pearson",
hist.col = "white",
density=TRUE,
ellipses=FALSE,
scale=TRUE)
log_wide <- cafi_coral %>%
subset(branch == "wide") %>%
dplyr::select(log_num_cafi,log_volume_pg,log_SA,log_interstitial_space,log_sphericity,convexity,packing)
pairs.panels(log_wide,
method="pearson",
hist.col = "white",
density=TRUE,
ellipses=FALSE,
scale=TRUE)
log_cafi_coral <- cafi_coral %>%
dplyr::select(log_num_cafi,log_volume_pg,log_SA,log_interstitial_space,log_sphericity,convexity,packing)
pairs.panels(log_cafi_coral,
method="pearson",
hist.col = "white",
density=TRUE,
ellipses=FALSE,
scale=TRUE)
plot1 <- ggplot(cafi_coral, aes(x=log_volume_pg, y=log_num_cafi))+
geom_point()+
geom_smooth(method=lm, se=TRUE)+
theme_classic()+
theme(axis.title.y=element_blank(),
legend.position="none")+
labs(x=expression(paste("Log software est. V cm"^{3})))
plot2 <- ggplot(cafi_coral, aes(x=log_volume_field, y=log_num_cafi))+
geom_point()+
geom_smooth(method=lm, se=TRUE)+
theme_classic()+
theme(axis.title.y=element_blank(),
legend.position="none")+
labs(x=expression(paste("Log elipsoid V cm"^{3})))
plot3 <- ggplot(cafi_coral, aes(x=log_sphericity, y=log_num_cafi))+
geom_point()+
geom_smooth(method=lm, se=TRUE)+
theme_classic()+
theme(axis.title.y=element_blank(),
legend.position="none")+
labs(x="Log sphericity")
plot4 <- ggplot(cafi_coral, aes(x=packing, y=log_num_cafi))+
geom_point()+
geom_smooth(method=lm, se=TRUE)+
theme_classic()+
theme(axis.title.y=element_blank(),
legend.position="none")+
labs(x="Packing")
fig3 <- ggarrange(plot1,plot2,plot3,plot4,
ncol = 2, nrow=2)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
annotate_figure(fig3,
top = text_grob("Morphological Measurements", color = "black", face = "bold", size = 14),
left = text_grob("Log invert. abundance", color = "black",
rot = 90, face = "bold", size = 12),
fig.lab = "Figure 1", fig.lab.face = "bold")
#Linear model for log sphericity and invert abundance
lm_sphericity <- lm(log_num_cafi~log_sphericity, data=cafi_coral)
par(mfrow=c(2,2))
plot(lm_sphericity)
par(mfrow=c(1,1))
#HTML table from regression results
pl <- c(
'(Intercept)' = "Intercept",
log_sphericity = "Log sphericity"
)
tab_model(lm_sphericity,
pred.labels = pl,
dv.labels = "Invertebrate abundance")
| Invertebrate abundance | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| Intercept | 1.19 | -0.18 – 2.56 | 0.085 |
| Log sphericity | -1.21 | -2.33 – -0.10 | 0.033 |
| Observations | 29 | ||
| R2 / R2 adjusted | 0.157 / 0.126 | ||
#Linear model for packing and invert abundance
lm_packing <- lm(log_num_cafi~packing, data=cafi_coral)
par(mfrow=c(2,2))
plot(lm_packing)
par(mfrow=c(1,1))
#HTML table from regression results
pl <- c(
'(Intercept)' = "Intercept",
packing = "Packing"
)
tab_model(lm_packing,
pred.labels = pl,
dv.labels = "Invertebrate abundance")
| Invertebrate abundance | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| Intercept | 0.34 | -1.08 – 1.76 | 0.626 |
| Packing | 1.21 | 0.48 – 1.94 | 0.002 |
| Observations | 29 | ||
| R2 / R2 adjusted | 0.300 / 0.274 | ||
#Linear model for convexity and invert abundance
lm_convexity <- lm(log_num_cafi~convexity, data=cafi_coral)
par(mfrow=c(2,2))
plot(lm_convexity)
par(mfrow=c(1,1))
#HTML table from regression results
pl <- c(
'(Intercept)' = "Intercept",
convexity = "Convexity"
)
tab_model(lm_convexity,
pred.labels = pl,
dv.labels = "Invertebrate abundance")
| Invertebrate abundance | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| Intercept | 2.42 | 1.16 – 3.68 | 0.001 |
| Convexity | 0.50 | -2.04 – 3.05 | 0.689 |
| Observations | 29 | ||
| R2 / R2 adjusted | 0.006 / -0.031 | ||
Log transformation appears normal. Log transformed volume, surface area, interstitial space, and packing were morphological measurements that had a positive correlation to number of cafi. Log transformed sphericity has a negative correlation to cafi. Because our data does not contain any 0 counts for invertebrate (meaning every coral contained an invertebrate population) we applied a log transformation to our count data.
Linear models will be fit to all corals as well as subset to tight and wide branching morphotypes.
VIF>10 will be removed from the dataset
lm1 <- lm(log_num_cafi~log_volume_pg+log_sphericity+packing, data=log_cafi_coral)
vif(lm1)
## log_volume_pg log_sphericity packing
## 3.808732 1.768799 3.823104
lm_tight <- lm(log_num_cafi~log_volume_pg+packing, data=log_tight)
vif(lm_tight)
## log_volume_pg packing
## 4.568216 4.568216
lm_wide <- lm(log_num_cafi~log_volume_pg+log_sphericity+packing, data=log_wide)
vif(lm_wide)
## log_volume_pg log_sphericity packing
## 6.346522 4.847754 3.009344
res1=lm1$residuals
#Test normality assumption on residuals
par(mfrow=c(2,2))
plot(lm1)
par(mfrow=c(1,1))
hist(res1)
shapiro.test(res1)
##
## Shapiro-Wilk normality test
##
## data: res1
## W = 0.93436, p-value = 0.07139
#Data is FINALY! normal
We fail to reject the null hypothesis that our residuals are normally distributed.
A step AIC will determine a parsimonious model
fullmodel <- lm(log_num_cafi~log_volume_pg+log_sphericity+packing, data=log_cafi_coral)
nullmodel <- lm(log_num_cafi~1, data=log_cafi_coral)
stepAIC(fullmodel, scope = c(upper = fullmodel, lower = nullmodel), direction = "both")
## Start: AIC=-25.93
## log_num_cafi ~ log_volume_pg + log_sphericity + packing
##
## Df Sum of Sq RSS AIC
## - log_sphericity 1 0.00066 9.0009 -27.929
## - packing 1 0.01376 9.0140 -27.887
## <none> 9.0003 -25.931
## - log_volume_pg 1 1.30926 10.3095 -23.993
##
## Step: AIC=-27.93
## log_num_cafi ~ log_volume_pg + packing
##
## Df Sum of Sq RSS AIC
## - packing 1 0.01311 9.0140 -29.887
## <none> 9.0009 -27.929
## - log_volume_pg 1 1.36787 10.3688 -25.826
##
## Step: AIC=-29.89
## log_num_cafi ~ log_volume_pg
##
## Df Sum of Sq RSS AIC
## <none> 9.014 -29.887
## - log_volume_pg 1 5.7912 14.805 -17.497
##
## Call:
## lm(formula = log_num_cafi ~ log_volume_pg, data = log_cafi_coral)
##
## Coefficients:
## (Intercept) log_volume_pg
## 0.4415 0.2958
fullmodel_tight <- lm(log_num_cafi~log_volume_pg+log_sphericity+packing, data=log_tight)
nullmodel_tight <- lm(log_num_cafi~1, data=log_tight)
stepAIC(fullmodel_tight, scope = c(upper = fullmodel_tight, lower = nullmodel_tight), direction = "both")
## Start: AIC=-8.29
## log_num_cafi ~ log_volume_pg + log_sphericity + packing
##
## Df Sum of Sq RSS AIC
## - log_sphericity 1 0.17419 3.2618 -9.6314
## - packing 1 0.21902 3.3067 -9.4676
## - log_volume_pg 1 0.52475 3.6124 -8.4065
## <none> 3.0876 -8.2900
##
## Step: AIC=-9.63
## log_num_cafi ~ log_volume_pg + packing
##
## Df Sum of Sq RSS AIC
## - packing 1 0.06174 3.3236 -11.4064
## - log_volume_pg 1 0.45069 3.7125 -10.0784
## <none> 3.2618 -9.6314
##
## Step: AIC=-11.41
## log_num_cafi ~ log_volume_pg
##
## Df Sum of Sq RSS AIC
## <none> 3.3236 -11.406
## - log_volume_pg 1 0.93218 4.2557 -10.440
##
## Call:
## lm(formula = log_num_cafi ~ log_volume_pg, data = log_tight)
##
## Coefficients:
## (Intercept) log_volume_pg
## 1.2875 0.1917
fullmodel_wide <- lm(log_num_cafi~log_volume_pg+log_sphericity+packing, data=log_wide)
nullmodel_wide <- lm(log_num_cafi~1, data=log_wide)
stepAIC(fullmodel_wide, scope = c(upper = fullmodel_wide, lower = nullmodel_wide), direction = "both")
## Start: AIC=-12.39
## log_num_cafi ~ log_volume_pg + log_sphericity + packing
##
## Df Sum of Sq RSS AIC
## - log_sphericity 1 0.03663 5.1602 -14.268
## - packing 1 0.08969 5.2132 -14.094
## <none> 5.1235 -12.389
## - log_volume_pg 1 0.72665 5.8502 -12.135
##
## Step: AIC=-14.27
## log_num_cafi ~ log_volume_pg + packing
##
## Df Sum of Sq RSS AIC
## - packing 1 0.08039 5.2405 -16.005
## <none> 5.1602 -14.268
## - log_volume_pg 1 1.08120 6.2414 -13.034
##
## Step: AIC=-16.01
## log_num_cafi ~ log_volume_pg
##
## Df Sum of Sq RSS AIC
## <none> 5.2405 -16.0054
## - log_volume_pg 1 4.823 10.0635 -6.9131
##
## Call:
## lm(formula = log_num_cafi ~ log_volume_pg, data = log_wide)
##
## Coefficients:
## (Intercept) log_volume_pg
## -0.06089 0.36422
We will compare our log transformed poisson general linear model with a quasi-poisson general linear model with non-log transformed counts.